rm(list = ls())

library(ggplot2)
library(plyr)
library(reshape2)

## Create data set

cab.raw <- read.csv('data/cabinet.csv', fileEncoding='utf-8', as.is=TRUE)

cab <- cab.raw
names(cab) <- gsub('_', '.', names(cab), fixed = TRUE)
es <- read.csv("data/electoral-system.csv", fileEncoding='utf-8', as.is=TRUE)
cab <- merge(cab, es, by= 'country.name.short')


# Data for Stata analysis
cab.out <- cab
names(cab.out) <- gsub('.', '_', names(cab), fixed=TRUE)
write.csv(cab.out, 'graphs-tables/cabinet-lm.csv', fileEncoding = 'cp1252', na='', row.names = FALSE)


# Descriptive countries
cou.sum <- read.csv('data/cabinet-mean.csv', fileEncoding='cp1252', as.is=TRUE)
cou.sum <- subset(cou.sum, , c('country.name.short', 'disproportionality', 
                               'left.enp', 'right.enp', 'left', 'centre', 'right'))
cou.west <- ddply(cab.out[cab.out$region != 4 , ], .(country_name_short), 
                  summarize, first=min(election_date), proportional=max(proportional))
cou.app <- merge(cou.west, cou.sum, by.x='country_name_short', by.y='country.name.short')

cou.app <- cou.app[with(cou.app, order(proportional, country_name_short)) , ]
cou.app$first <- substr(cou.app$first, 1, 4)

write.csv(cou.app, 'graphs-tables/table-3-country-info.csv', fileEncoding = 'cp1252', na='', row.names = FALSE)


# Documentation descriptive statistics
doc.desc <- read.csv("data/variables.csv", fileEncoding='cp1252', as.is=TRUE)
cab.west <- subset(cab.out, region != 4, )
doc.var  <- cab.west[ , names(cab.west) %in% doc.desc$name]

funcs <- c('min', 'max', 'median', 'mean', 'sd')
for (func in funcs) {
  for (var in names(doc.var)) {
    if( ! is.numeric(doc.var[ , var]) | grepl('_id$', var, perl=TRUE)) {
      next
    }
    round.val <- ifelse(func=='sd', 2, 1) # two decimal digits for SD one otherwise 
    value <- round(get(func)(doc.var[ , var], na.rm=TRUE), round.val)
    doc.desc[doc.desc$name == var, func]  <- value
  }
}
write.csv(doc.desc, 'graphs-tables/table-a1-descriptive.csv', fileEncoding = 'cp1252', na='', row.names = FALSE)
